import os
# check scikit-learn version
import sklearn
print(sklearn.__version__)
1.1.1
%pylab inline
import pandas as pd
import seaborn as sns
Populating the interactive namespace from numpy and matplotlib
%load_ext autoreload
%autoreload 2
# configurations pandas
pd.set_option('display.max_rows', 250)
pd.set_option('display.max_columns', 250)
import warnings
warnings.filterwarnings("ignore")
path = './data'
filename = 'Home_Credit_Default_Risk.csv'
data = pd.read_csv(os.path.join(path, filename))
print(data.shape)
(356255, 425)
data_sampling = data.sample(frac=0.1, random_state=42)
import gc
from sklearn.model_selection import KFold, StratifiedKFold
from lightgbm import LGBMClassifier, early_stopping, log_evaluation
from sklearn.metrics import roc_auc_score
path = './data'
test_filename = 'Test_Home_Credit_Default_Risk.csv'
image_filename = 'Feature_Importance_LightGBM.png'
# Display/plot feature importance
def display_importances(feature_importance_df_, debug=False):
cols = feature_importance_df_[["feature", "importance"]].groupby("feature").mean().sort_values(by="importance", ascending=False)[:40].index
best_features = feature_importance_df_.loc[feature_importance_df_.feature.isin(cols)]
plt.figure(figsize=(8, 10))
sns.barplot(x="importance", y="feature", data=best_features.sort_values(by="importance", ascending=False), order=cols)
plt.title('LightGBM Features (avg over folds)')
plt.tight_layout()
if not debug:
plt.savefig(os.path.join(path, image_filename))
# LightGBM GBDT with KFold or Stratified KFold
# Parameters from Tilii kernel: https://www.kaggle.com/tilii7/olivier-lightgbm-parameters-by-bayesian-opt/code
def kfold_lightgbm(df, num_folds, stratified=False, debug=False):
# Divide in training/validation and test data
train_df = df[df['TARGET'].notnull()].copy()
test_df = df[df['TARGET'].isnull()].copy()
print("Starting LightGBM. Train shape: {}, test shape: {}".format(train_df.shape, test_df.shape))
del df
gc.collect()
# Cross validation model
if stratified:
folds = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=1001)
else:
folds = KFold(n_splits=num_folds, shuffle=True, random_state=1001)
# Create arrays and dataframes to store results
oof_preds = np.zeros(train_df.shape[0])
sub_preds = np.zeros(test_df.shape[0])
feature_importance_df = pd.DataFrame()
feats = [f for f in train_df.columns if f not in ['TARGET', 'SK_ID_CURR', 'SK_ID_BUREAU', 'SK_ID_PREV', 'index']]
for n_fold, (train_idx, valid_idx) in enumerate(folds.split(train_df[feats], train_df['TARGET'])):
train_x, train_y = train_df[feats].iloc[train_idx], train_df['TARGET'].iloc[train_idx]
valid_x, valid_y = train_df[feats].iloc[valid_idx], train_df['TARGET'].iloc[valid_idx]
# LightGBM parameters found by Bayesian optimization
clf = LGBMClassifier(
n_jobs=8,
n_estimators=10000,
learning_rate=0.02,
num_leaves=34,
colsample_bytree=0.6, # 0.9497036,
subsample=0.8715623,
max_depth=8,
reg_alpha=0.041545473,
reg_lambda=0.0735294,
min_split_gain=0.0222415,
min_child_weight=39.3259775,
verbose=-1,
force_row_wise=True
)
log_evaluation_clb = log_evaluation(period=200)
early_stopping_clb = early_stopping(stopping_rounds=200)
clf.fit(
train_x,
train_y,
eval_set=[(train_x, train_y), (valid_x, valid_y)],
eval_metric='auc',
callbacks=[early_stopping_clb, log_evaluation_clb]
)
oof_preds[valid_idx] = clf.predict_proba(valid_x, num_iteration=clf.best_iteration_)[:, 1]
sub_preds += clf.predict_proba(test_df[feats], num_iteration=clf.best_iteration_)[:, 1] / folds.n_splits
fold_importance_df = pd.DataFrame()
fold_importance_df["feature"] = feats
fold_importance_df["importance"] = clf.feature_importances_
fold_importance_df["fold"] = n_fold + 1
feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
print('Fold %2d AUC : %.6f' % (n_fold + 1, roc_auc_score(valid_y, oof_preds[valid_idx])))
del clf, train_x, train_y, valid_x, valid_y
gc.collect()
print('Full AUC score %.6f' % roc_auc_score(train_df['TARGET'], oof_preds))
# Write submission file and plot feature importance
if not debug:
test_df['TARGET'] = sub_preds
test_df[['SK_ID_CURR', 'TARGET']].to_csv(os.path.join(path, test_filename), index=False)
display_importances(feature_importance_df, debug=debug)
return feature_importance_df
feature_importance_df = kfold_lightgbm(data_sampling, num_folds=4, stratified=False, debug=True)
Starting LightGBM. Train shape: (30846, 425), test shape: (4780, 425) Training until validation scores don't improve for 200 rounds [200] training's auc: 0.867334 training's binary_logloss: 0.216124 valid_1's auc: 0.743922 valid_1's binary_logloss: 0.252913 [400] training's auc: 0.910685 training's binary_logloss: 0.193713 valid_1's auc: 0.745769 valid_1's binary_logloss: 0.251898 Early stopping, best iteration is: [312] training's auc: 0.8937 training's binary_logloss: 0.202581 valid_1's auc: 0.746136 valid_1's binary_logloss: 0.251818 Fold 1 AUC : 0.746136 Training until validation scores don't improve for 200 rounds [200] training's auc: 0.86504 training's binary_logloss: 0.216146 valid_1's auc: 0.744107 valid_1's binary_logloss: 0.253102 [400] training's auc: 0.907207 training's binary_logloss: 0.194778 valid_1's auc: 0.746499 valid_1's binary_logloss: 0.25165 [600] training's auc: 0.935452 training's binary_logloss: 0.178235 valid_1's auc: 0.74586 valid_1's binary_logloss: 0.251971 Early stopping, best iteration is: [421] training's auc: 0.910349 training's binary_logloss: 0.19296 valid_1's auc: 0.746778 valid_1's binary_logloss: 0.251573 Fold 2 AUC : 0.746778 Training until validation scores don't improve for 200 rounds [200] training's auc: 0.863944 training's binary_logloss: 0.218271 valid_1's auc: 0.736032 valid_1's binary_logloss: 0.246888 [400] training's auc: 0.905849 training's binary_logloss: 0.196022 valid_1's auc: 0.740528 valid_1's binary_logloss: 0.245304 Early stopping, best iteration is: [340] training's auc: 0.895625 training's binary_logloss: 0.201644 valid_1's auc: 0.741379 valid_1's binary_logloss: 0.245162 Fold 3 AUC : 0.741379 Training until validation scores don't improve for 200 rounds [200] training's auc: 0.868906 training's binary_logloss: 0.21716 valid_1's auc: 0.744629 valid_1's binary_logloss: 0.246811 [400] training's auc: 0.909643 training's binary_logloss: 0.194964 valid_1's auc: 0.746159 valid_1's binary_logloss: 0.245923 Early stopping, best iteration is: [303] training's auc: 0.892462 training's binary_logloss: 0.204365 valid_1's auc: 0.746827 valid_1's binary_logloss: 0.245702 Fold 4 AUC : 0.746827 Full AUC score 0.745167
feature_importance_df = kfold_lightgbm(data_sampling, num_folds=4, stratified=True, debug=True)
Starting LightGBM. Train shape: (30846, 425), test shape: (4780, 425) Training until validation scores don't improve for 200 rounds [200] training's auc: 0.862919 training's binary_logloss: 0.218029 valid_1's auc: 0.749127 valid_1's binary_logloss: 0.248615 [400] training's auc: 0.907815 training's binary_logloss: 0.195258 valid_1's auc: 0.754375 valid_1's binary_logloss: 0.246677 [600] training's auc: 0.933878 training's binary_logloss: 0.179564 valid_1's auc: 0.75204 valid_1's binary_logloss: 0.247327 Early stopping, best iteration is: [423] training's auc: 0.911753 training's binary_logloss: 0.193093 valid_1's auc: 0.754479 valid_1's binary_logloss: 0.246666 Fold 1 AUC : 0.754479 Training until validation scores don't improve for 200 rounds [200] training's auc: 0.868786 training's binary_logloss: 0.215662 valid_1's auc: 0.728946 valid_1's binary_logloss: 0.253133 [400] training's auc: 0.908833 training's binary_logloss: 0.193882 valid_1's auc: 0.730834 valid_1's binary_logloss: 0.252502 Early stopping, best iteration is: [336] training's auc: 0.897974 training's binary_logloss: 0.199893 valid_1's auc: 0.731192 valid_1's binary_logloss: 0.25245 Fold 2 AUC : 0.731192 Training until validation scores don't improve for 200 rounds [200] training's auc: 0.865982 training's binary_logloss: 0.216867 valid_1's auc: 0.732128 valid_1's binary_logloss: 0.251552 [400] training's auc: 0.908711 training's binary_logloss: 0.194518 valid_1's auc: 0.734308 valid_1's binary_logloss: 0.250999 Early stopping, best iteration is: [343] training's auc: 0.898829 training's binary_logloss: 0.199948 valid_1's auc: 0.73482 valid_1's binary_logloss: 0.250765 Fold 3 AUC : 0.734820 Training until validation scores don't improve for 200 rounds [200] training's auc: 0.867075 training's binary_logloss: 0.216943 valid_1's auc: 0.747648 valid_1's binary_logloss: 0.248731 [400] training's auc: 0.909817 training's binary_logloss: 0.194981 valid_1's auc: 0.750619 valid_1's binary_logloss: 0.247275 Early stopping, best iteration is: [388] training's auc: 0.907569 training's binary_logloss: 0.196162 valid_1's auc: 0.750923 valid_1's binary_logloss: 0.247181 Fold 4 AUC : 0.750923 Full AUC score 0.742910
from lightgbm import LGBMClassifier
from sklearn.model_selection import GridSearchCV
from collections import Counter
def train_test_split(_df):
# Divide in training/validation and test data
_train_df = _df[_df['TARGET'].notnull()]
_test_df = _df[_df['TARGET'].isnull()]
print("Train shape: {}, test shape: {}".format(_train_df.shape, _test_df.shape))
counter = Counter(_train_df['TARGET'])
print("Conteur de class", counter)
return _train_df, _test_df
def feats_target_split(_df):
feats = [f for f in _df.columns if f not in ['TARGET', 'SK_ID_CURR', 'SK_ID_BUREAU', 'SK_ID_PREV', 'index']]
_X = _df[feats].copy()
_y = _df['TARGET'].copy()
return _X, _y
def gscv_classifier(_X, _y, _gridParams, _clf):
print("Starting {}".format(_clf.__class__.__name__))
_grid = GridSearchCV(_clf, _gridParams, scoring='roc_auc', verbose=2, cv=4)
# Run the grid
_grid.fit(_X, _y)
# Print the best parameters found
print("Best params:", _grid.best_params_)
print("Best score:", _grid.best_score_)
return _grid
from sklearn.metrics import auc
from sklearn.metrics import RocCurveDisplay, roc_auc_score
from sklearn.model_selection import KFold, StratifiedKFold
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
def _plot_classifier(X, y, clf, title=None, folds=5, stratified=True):
print("Ploting {}".format(clf.__class__.__name__))
if stratified:
cv = StratifiedKFold(n_splits=folds, shuffle=True, random_state=1)
else:
cv = KFold(n_splits=folds, shuffle=True, random_state=1)
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
oof_preds = np.zeros(X.shape[0])
feature_importance_df = pd.DataFrame()
fig, axs = plt.subplots(2, figsize=(15, 20))
for i, (train, test) in enumerate(cv.split(X, y)):
train_x, train_y = X.iloc[train], y.iloc[train]
valid_x, valid_y = X.iloc[test], y.iloc[test]
clf.fit(train_x, train_y)
viz = RocCurveDisplay.from_estimator(clf, valid_x, valid_y, name="ROC fold {}".format(i), alpha=0.3, lw=1, ax=axs[1])
interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
interp_tpr[0] = 0.0
tprs.append(interp_tpr)
aucs.append(viz.roc_auc)
if isinstance(clf, XGBClassifier):
oof_preds[test] = clf.predict_proba(valid_x, iteration_range=(0, clf.best_iteration))[:, 1]
elif isinstance(clf, LGBMClassifier):
oof_preds[test] = clf.predict_proba(valid_x, num_iteration=clf.best_iteration_)[:, 1]
else:
oof_preds[test] = clf.predict_proba(valid_x)[:, 1]
fold_importance_df = pd.DataFrame()
fold_importance_df["feature"] = X.columns
fold_importance_df["importance"] = clf.feature_importances_
fold_importance_df["fold"] = i + 1
feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
print('Fold %2d AUC : %.6f' % (i + 1, roc_auc_score(valid_y, oof_preds[test])))
cols = feature_importance_df[["feature", "importance"]].groupby("feature").mean().sort_values(by="importance", ascending=False)[:20].index
best_features = feature_importance_df.loc[feature_importance_df.feature.isin(cols)]
sns.barplot(x="importance", y="feature", data=best_features.sort_values(by="importance", ascending=False), order=cols, ax=axs[0])
axs[0].set_title('{} Feature Importance (avg over folds)'.format(clf.__class__.__name__))
axs[1].plot([0, 1], [0, 1], linestyle="--", lw=2, color="r", label="Chance", alpha=0.8)
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
axs[1].plot(mean_fpr, mean_tpr, color="b", label=r"Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc), lw=2, alpha=0.8)
std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
axs[1].fill_between(mean_fpr, tprs_lower, tprs_upper, color="grey", alpha=0.2, label=r"$\pm$ 1 std. dev.")
axs[1].set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05], title="{} ROC Curve".format(clf.__class__.__name__))
axs[1].legend(loc="lower right")
if title:
plt.suptitle(title, y=1, fontweight ="bold")
fig.tight_layout()
plt.show()
from sklearn.metrics import auc
from sklearn.metrics import RocCurveDisplay, roc_auc_score
from sklearn.model_selection import KFold, StratifiedKFold
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
def plot_balanced_classifier(_X, _y, clf, title=None, folds=5, stratified=True, oversample=False, overparams={}, undersample=False, underparams={}):
print("Ploting {}".format(clf.__class__.__name__))
if stratified:
cv = StratifiedKFold(n_splits=folds, shuffle=True, random_state=1)
else:
cv = KFold(n_splits=folds, shuffle=True, random_state=1)
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
oof_preds = np.zeros(_X.shape[0])
feature_importance_df = pd.DataFrame()
fig, axs = plt.subplots(2, figsize=(15, 20))
for i, (train_idx, test_idx) in enumerate(cv.split(X, y)):
_train_x, _train_y = _X.iloc[train_idx], _y.iloc[train_idx]
_valid_x, _valid_y = _X.iloc[test_idx], _y.iloc[test_idx]
if oversample and not undersample:
print("Oversampling...")
over = SMOTE(**overparams)
_train_xx, _train_yy = over.fit_resample(_train_x, _train_y)
elif undersample and not oversample:
print("Undersampling...")
under = RandomUnderSampler(**underparams)
_train_xx, _train_yy = under.fit_resample(_train_x, _train_y)
elif oversample and undersample:
print("Pipeline Oversampling and Undersampling...")
over = SMOTE(**overparams)
under = RandomUnderSampler(**underparams)
steps = [('o', over), ('u', under)]
pipeline = Pipeline(steps=steps)
_train_xx, _train_yy = pipeline.fit_resample(_train_x, _train_y)
else:
_train_xx, _train_yy = _train_x, _train_y
counter = Counter(_train_yy)
print("Conteur de class, fold {}: {}".format(i, counter))
clf.fit(_train_xx, _train_yy)
viz = RocCurveDisplay.from_estimator(clf, _valid_x, _valid_y, name="ROC fold {}".format(i), alpha=0.3, lw=1, ax=axs[1])
interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
interp_tpr[0] = 0.0
tprs.append(interp_tpr)
aucs.append(viz.roc_auc)
if isinstance(clf, XGBClassifier):
oof_preds[test_idx] = clf.predict_proba(_valid_x, iteration_range=(0, clf.best_iteration))[:, 1]
elif isinstance(clf, LGBMClassifier):
oof_preds[test_idx] = clf.predict_proba(_valid_x, num_iteration=clf.best_iteration_)[:, 1]
else:
oof_preds[test_idx] = clf.predict_proba(_valid_x)[:, 1]
fold_importance_df = pd.DataFrame()
fold_importance_df["feature"] = X.columns
fold_importance_df["importance"] = clf.feature_importances_
fold_importance_df["fold"] = i + 1
feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
print('Fold %2d AUC : %.6f' % (i + 1, roc_auc_score(_valid_y, oof_preds[test_idx])))
cols = feature_importance_df[["feature", "importance"]].groupby("feature").mean().sort_values(by="importance", ascending=False)[:20].index
best_features = feature_importance_df.loc[feature_importance_df.feature.isin(cols)]
sns.barplot(x="importance", y="feature", data=best_features.sort_values(by="importance", ascending=False), order=cols, ax=axs[0])
axs[0].set_title('{} Feature Importance (avg over folds)'.format(clf.__class__.__name__))
axs[1].plot([0, 1], [0, 1], linestyle="--", lw=2, color="r", label="Chance", alpha=0.8)
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
axs[1].plot(mean_fpr, mean_tpr, color="b", label=r"Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc), lw=2, alpha=0.8)
std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
axs[1].fill_between(mean_fpr, tprs_lower, tprs_upper, color="grey", alpha=0.2, label=r"$\pm$ 1 std. dev.")
axs[1].set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05], title="{} ROC Curve".format(clf.__class__.__name__))
axs[1].legend(loc="lower right")
if title:
plt.suptitle(title, y=1, fontweight ="bold")
fig.tight_layout()
plt.show()
return clf
from lightgbm import LGBMClassifier
gridParams = {
'num_leaves': [8, 16, 34], # Shall be smaller than 2^(max_depth)
'max_depth': [8],
'min_child_samples': [100, 1000],
'learning_rate': [0.01, 0.05, 0.1],
'colsample_bytree': [0.1, 0.3, 0.5], # Alias for feature_fraction
}
# Initiate classifier to use
clf = LGBMClassifier(
objective='binary',
n_jobs=8,
verbose=-1,
force_row_wise=True,
)
train_df, _ = train_test_split(data_sampling)
X, y = feats_target_split(train_df)
lgbm_grid = gscv_classifier(X, y, gridParams, clf)
_ = plot_balanced_classifier(X, y, lgbm_grid.best_estimator_, "LightGBM without balanced class weight")
Train shape: (30846, 425), test shape: (4780, 425)
Conteur de class Counter({0.0: 28379, 1.0: 2467})
Starting LGBMClassifier
Fitting 4 folds for each of 54 candidates, totalling 216 fits
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 0.9s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.1s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.9s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 0.9s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 0.9s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 0.9s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 0.9s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.4s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.5s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.3s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.3s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.4s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.4s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.5s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.3s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.4s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.6s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.9s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 2.1s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.9s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.9s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.5s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.6s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.7s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.8s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.6s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.5s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.5s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.7s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.6s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.6s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.6s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.2s
Best params: {'colsample_bytree': 0.3, 'learning_rate': 0.05, 'max_depth': 8, 'min_child_samples': 1000, 'num_leaves': 34}
Best score: 0.7399814473499138
Ploting LGBMClassifier
Conteur de class, fold 0: Counter({0.0: 22703, 1.0: 1973})
Fold 1 AUC : 0.757400
Conteur de class, fold 1: Counter({0.0: 22703, 1.0: 1974})
Fold 2 AUC : 0.747618
Conteur de class, fold 2: Counter({0.0: 22703, 1.0: 1974})
Fold 3 AUC : 0.731920
Conteur de class, fold 3: Counter({0.0: 22703, 1.0: 1974})
Fold 4 AUC : 0.738462
Conteur de class, fold 4: Counter({0.0: 22704, 1.0: 1973})
Fold 5 AUC : 0.740896
from lightgbm import LGBMClassifier
gridParams = {
'num_leaves': [8, 16, 34], # Shall be smaller than 2^(max_depth)
'max_depth': [8],
'min_child_samples': [100, 1000],
'learning_rate': [0.01, 0.05, 0.1],
'colsample_bytree': [0.1, 0.3, 0.5], # Alias for feature_fraction
}
# Initiate classifier to use
clf = LGBMClassifier(
objective='binary',
n_jobs=8,
verbose=-1,
force_row_wise=True,
is_unbalance=True, # try to Automatically balance the weight of the dominated labels
)
train_df, _ = train_test_split(data_sampling)
X, y = feats_target_split(train_df)
lgbm_grid = gscv_classifier(X, y, gridParams, clf)
plot_balanced_classifier(X, y, lgbm_grid.best_estimator_, "LightGBM with balanced class weight")
Train shape: (30846, 425), test shape: (4780, 425)
Conteur de class Counter({0.0: 28379, 1.0: 2467})
Starting LGBMClassifier
Fitting 4 folds for each of 54 candidates, totalling 216 fits
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.6s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 0.9s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.1s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 0.9s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 0.9s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 0.9s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 0.9s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 0.9s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.6s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 0.8s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.5s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.4s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.3s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.5s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.8s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.5s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.3s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.4s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.4s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 0.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.8s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.9s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.8s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.7s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.01, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.5s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.8s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.7s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.6s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.7s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.5s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.6s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.6s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.05, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.0s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.5s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.5s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=16; total time= 1.4s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.6s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.6s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.6s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=100, num_leaves=34; total time= 1.6s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=8; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.3s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=16; total time= 1.1s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.7s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.2s
[CV] END colsample_bytree=0.5, learning_rate=0.1, max_depth=8, min_child_samples=1000, num_leaves=34; total time= 1.3s
Best params: {'colsample_bytree': 0.5, 'learning_rate': 0.1, 'max_depth': 8, 'min_child_samples': 1000, 'num_leaves': 16}
Best score: 0.7390624475743153
Ploting LGBMClassifier
Conteur de class, fold 0: Counter({0.0: 22703, 1.0: 1973})
Fold 1 AUC : 0.754718
Conteur de class, fold 1: Counter({0.0: 22703, 1.0: 1974})
Fold 2 AUC : 0.743641
Conteur de class, fold 2: Counter({0.0: 22703, 1.0: 1974})
Fold 3 AUC : 0.727650
Conteur de class, fold 3: Counter({0.0: 22703, 1.0: 1974})
Fold 4 AUC : 0.750862
Conteur de class, fold 4: Counter({0.0: 22704, 1.0: 1973})
Fold 5 AUC : 0.730055
LGBMClassifier(colsample_bytree=0.5, force_row_wise=True, is_unbalance=True,
max_depth=8, min_child_samples=1000, n_jobs=8, num_leaves=16,
objective='binary', verbose=-1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. LGBMClassifier(colsample_bytree=0.5, force_row_wise=True, is_unbalance=True,
max_depth=8, min_child_samples=1000, n_jobs=8, num_leaves=16,
objective='binary', verbose=-1)from xgboost import XGBClassifier
gridParams = {
'learning_rate': [0.01, 0.1],
'max_depth': [4, 6],
'subsample': [0.6, 0.9],
'colsample_bytree': [0.1, 0.3],
'n_estimators': [400, 800]
}
# Initiate classifier to use
clf = XGBClassifier(
verbosity=0,
objective='binary:logistic',
nthread=8,
use_label_encoder=False,
error_score='raise',
)
train_df, _ = train_test_split(data_sampling)
X, y = feats_target_split(train_df)
X.fillna(X.median(), inplace=True)
xgb_grid = gscv_classifier(X, y, gridParams, clf)
plot_balanced_classifier(X, y, xgb_grid.best_estimator_, "XGBoost without balanced class weight")
Train shape: (30846, 425), test shape: (4780, 425)
Conteur de class Counter({0.0: 28379, 1.0: 2467})
Starting XGBClassifier
Fitting 4 folds for each of 32 candidates, totalling 128 fits
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 10.6s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 9.5s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 9.0s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 8.9s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 9.0s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 9.0s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 9.0s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 9.0s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.4s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.3s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.5s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.6s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.6s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.5s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.4s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.4s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 11.5s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 11.6s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 11.6s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 11.6s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 22.3s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 22.3s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 23.2s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 22.5s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 22.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 22.9s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 23.1s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 23.4s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 8.9s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 9.4s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 9.0s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 8.9s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 9.0s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 8.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 9.0s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 9.1s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.3s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.4s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.6s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.6s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.6s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.6s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.2s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.2s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.1s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.1s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 11.3s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 11.4s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 11.3s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 11.5s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 22.6s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 22.1s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 21.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 22.2s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 22.5s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 23.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 22.5s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 22.5s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 20.2s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 16.3s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 14.8s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 14.5s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 15.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 14.3s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 14.4s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 14.4s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 28.4s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 28.4s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 28.7s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 31.6s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 33.4s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 33.8s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 34.8s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 32.3s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 22.4s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 23.7s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 23.5s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 22.7s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 21.9s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 22.2s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 22.2s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 22.3s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 43.4s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 42.7s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 47.8s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 45.2s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 44.6s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 44.2s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 44.3s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 43.4s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 16.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 16.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 15.1s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 14.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 14.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 14.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 14.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 15.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 29.5s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 29.6s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 29.4s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 30.2s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 29.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 29.7s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 29.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 29.3s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 20.7s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 20.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 20.7s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 20.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 20.5s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 22.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 21.1s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 20.6s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 41.2s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 43.5s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 41.9s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 46.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 44.2s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 44.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 46.6s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 45.1s
Best params: {'colsample_bytree': 0.3, 'learning_rate': 0.01, 'max_depth': 6, 'n_estimators': 800, 'subsample': 0.6}
Best score: 0.743837713501315
Ploting XGBClassifier
Conteur de class, fold 0: Counter({0.0: 22703, 1.0: 1973})
Fold 1 AUC : 0.759556
Conteur de class, fold 1: Counter({0.0: 22703, 1.0: 1974})
Fold 2 AUC : 0.743982
Conteur de class, fold 2: Counter({0.0: 22703, 1.0: 1974})
Fold 3 AUC : 0.732931
Conteur de class, fold 3: Counter({0.0: 22703, 1.0: 1974})
Fold 4 AUC : 0.743384
Conteur de class, fold 4: Counter({0.0: 22704, 1.0: 1973})
Fold 5 AUC : 0.747262
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
colsample_bynode=1, colsample_bytree=0.3,
enable_categorical=False, error_score='raise', gamma=0, gpu_id=-1,
importance_type=None, interaction_constraints='',
learning_rate=0.01, max_delta_step=0, max_depth=6,
min_child_weight=1, missing=nan, monotone_constraints='()',
n_estimators=800, n_jobs=8, nthread=8, num_parallel_tree=1,
predictor='auto', random_state=0, reg_alpha=0, reg_lambda=1,
scale_pos_weight=1, subsample=0.6, tree_method='exact',
use_label_encoder=False, validate_parameters=1, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
colsample_bynode=1, colsample_bytree=0.3,
enable_categorical=False, error_score='raise', gamma=0, gpu_id=-1,
importance_type=None, interaction_constraints='',
learning_rate=0.01, max_delta_step=0, max_depth=6,
min_child_weight=1, missing=nan, monotone_constraints='()',
n_estimators=800, n_jobs=8, nthread=8, num_parallel_tree=1,
predictor='auto', random_state=0, reg_alpha=0, reg_lambda=1,
scale_pos_weight=1, subsample=0.6, tree_method='exact',
use_label_encoder=False, validate_parameters=1, ...)from xgboost import XGBClassifier
gridParams = {
'learning_rate': [0.01, 0.1],
'max_depth': [4, 6],
'subsample': [0.6, 0.9],
'colsample_bytree': [0.1, 0.3],
'n_estimators': [400, 800]
}
train_df, _ = train_test_split(data_sampling)
X, y = feats_target_split(train_df)
X.fillna(X.median(), inplace=True)
positive_class = y.sum()
negatvie_class = len(y) - positive_class
scale_pos_weight = negatvie_class / positive_class
# Initiate classifier to use
clf = XGBClassifier(
verbosity=0,
objective='binary:logistic',
nthread=8,
use_label_encoder=False,
error_score='raise',
scale_pos_weight=scale_pos_weight,
)
xgb_grid = gscv_classifier(X, y, gridParams, clf)
plot_balanced_classifier(X, y, xgb_grid.best_estimator_, "XGBoost with balanced class weight")
Train shape: (30846, 425), test shape: (4780, 425)
Conteur de class Counter({0.0: 28379, 1.0: 2467})
Starting XGBClassifier
Fitting 4 folds for each of 32 candidates, totalling 128 fits
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 9.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 8.9s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 10.0s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 9.4s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 8.9s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 9.0s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 9.3s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 9.1s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.5s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.4s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.4s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.6s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.4s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 18.3s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.7s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 12.1s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.2s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.5s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.2s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 11.5s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 12.1s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 12.2s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 12.2s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 22.1s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 21.9s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 21.8s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 22.2s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 22.6s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 22.9s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 23.1s
[CV] END colsample_bytree=0.1, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 23.0s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 8.9s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 8.6s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 8.6s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 8.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 8.9s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 9.2s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 8.9s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 8.9s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.0s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.1s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 17.2s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 16.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.3s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.0s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 17.4s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 16.9s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.1s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.1s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.4s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 11.1s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 11.6s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 11.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 11.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 11.7s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 21.5s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 21.8s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 22.1s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 21.9s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 22.1s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 22.1s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 22.1s
[CV] END colsample_bytree=0.1, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 22.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 14.5s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 14.4s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 14.6s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.6; total time= 16.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 15.1s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 16.8s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 15.5s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=400, subsample=0.9; total time= 15.2s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 32.1s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 31.9s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 33.7s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.6; total time= 33.3s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 33.5s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 31.3s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 31.9s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=4, n_estimators=800, subsample=0.9; total time= 32.5s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 22.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 21.6s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 21.2s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.6; total time= 21.9s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 21.7s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 21.2s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 21.5s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=400, subsample=0.9; total time= 21.7s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 42.7s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 42.6s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 41.4s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.6; total time= 40.2s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 39.9s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 41.0s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 40.1s
[CV] END colsample_bytree=0.3, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.9; total time= 39.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 14.7s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 14.2s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 14.4s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.6; total time= 14.4s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 14.7s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 15.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 14.1s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=400, subsample=0.9; total time= 14.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 27.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 28.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 28.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.6; total time= 28.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 27.6s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 27.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 27.3s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=4, n_estimators=800, subsample=0.9; total time= 30.5s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 20.2s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 20.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 19.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.6; total time= 19.3s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 19.5s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 19.2s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 20.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=400, subsample=0.9; total time= 19.3s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 38.2s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 38.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 40.8s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.6; total time= 39.7s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 38.0s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 37.7s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 38.2s
[CV] END colsample_bytree=0.3, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.9; total time= 37.9s
Best params: {'colsample_bytree': 0.3, 'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 800, 'subsample': 0.6}
Best score: 0.7401563916288427
Ploting XGBClassifier
Conteur de class, fold 0: Counter({0.0: 22703, 1.0: 1973})
Fold 1 AUC : 0.752596
Conteur de class, fold 1: Counter({0.0: 22703, 1.0: 1974})
Fold 2 AUC : 0.743611
Conteur de class, fold 2: Counter({0.0: 22703, 1.0: 1974})
Fold 3 AUC : 0.732443
Conteur de class, fold 3: Counter({0.0: 22703, 1.0: 1974})
Fold 4 AUC : 0.737996
Conteur de class, fold 4: Counter({0.0: 22704, 1.0: 1973})
Fold 5 AUC : 0.740938
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
colsample_bynode=1, colsample_bytree=0.3,
enable_categorical=False, error_score='raise', gamma=0, gpu_id=-1,
importance_type=None, interaction_constraints='',
learning_rate=0.01, max_delta_step=0, max_depth=4,
min_child_weight=1, missing=nan, monotone_constraints='()',
n_estimators=800, n_jobs=8, nthread=8, num_parallel_tree=1,
predictor='auto', random_state=0, reg_alpha=0, reg_lambda=1,
scale_pos_weight=11.503445480340494, subsample=0.6,
tree_method='exact', use_label_encoder=False,
validate_parameters=1, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
colsample_bynode=1, colsample_bytree=0.3,
enable_categorical=False, error_score='raise', gamma=0, gpu_id=-1,
importance_type=None, interaction_constraints='',
learning_rate=0.01, max_delta_step=0, max_depth=4,
min_child_weight=1, missing=nan, monotone_constraints='()',
n_estimators=800, n_jobs=8, nthread=8, num_parallel_tree=1,
predictor='auto', random_state=0, reg_alpha=0, reg_lambda=1,
scale_pos_weight=11.503445480340494, subsample=0.6,
tree_method='exact', use_label_encoder=False,
validate_parameters=1, ...)from sklearn.ensemble import RandomForestClassifier
gridParams = {
'n_estimators': [100, 400, 700],
'max_depth': [3, 4, 5, 6],
'max_samples': [0.5, 1],
}
# Initiate classifier to use
clf = RandomForestClassifier(
n_jobs=8,
verbose=0,
)
train_df, _ = train_test_split(data_sampling)
X, y = feats_target_split(train_df)
X.fillna(X.median(), inplace=True)
rfc_grid = gscv_classifier(X, y, gridParams, clf)
plot_balanced_classifier(X, y, rfc_grid.best_estimator_, "RandomForest without balanced class weight")
Train shape: (30846, 425), test shape: (4780, 425)
Conteur de class Counter({0.0: 28379, 1.0: 2467})
Starting RandomForestClassifier
Fitting 4 folds for each of 24 candidates, totalling 96 fits
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=100; total time= 23.4s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=100; total time= 0.8s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=100; total time= 0.8s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=100; total time= 0.7s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=400; total time= 2.2s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=400; total time= 2.1s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=400; total time= 2.1s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=400; total time= 2.2s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=700; total time= 3.4s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=700; total time= 3.4s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=700; total time= 3.4s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=700; total time= 3.4s
[CV] END .......max_depth=3, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=3, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=3, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=3, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=3, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=3, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=3, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=3, max_samples=1, n_estimators=400; total time= 0.7s
[CV] END .......max_depth=3, max_samples=1, n_estimators=700; total time= 1.3s
[CV] END .......max_depth=3, max_samples=1, n_estimators=700; total time= 1.4s
[CV] END .......max_depth=3, max_samples=1, n_estimators=700; total time= 1.3s
[CV] END .......max_depth=3, max_samples=1, n_estimators=700; total time= 1.2s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=100; total time= 0.9s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=100; total time= 1.0s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=100; total time= 0.9s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=100; total time= 0.9s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=400; total time= 2.8s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=400; total time= 2.5s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=400; total time= 2.6s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=400; total time= 2.6s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=700; total time= 4.3s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=700; total time= 4.2s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=700; total time= 4.1s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=700; total time= 4.2s
[CV] END .......max_depth=4, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=4, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=4, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=4, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=4, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=4, max_samples=1, n_estimators=400; total time= 0.9s
[CV] END .......max_depth=4, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=4, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=4, max_samples=1, n_estimators=700; total time= 1.2s
[CV] END .......max_depth=4, max_samples=1, n_estimators=700; total time= 1.5s
[CV] END .......max_depth=4, max_samples=1, n_estimators=700; total time= 1.3s
[CV] END .......max_depth=4, max_samples=1, n_estimators=700; total time= 1.3s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=100; total time= 1.2s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=100; total time= 1.1s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=100; total time= 1.1s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=100; total time= 1.2s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=400; total time= 3.2s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=400; total time= 3.2s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=400; total time= 3.1s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=400; total time= 3.1s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=700; total time= 5.0s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=700; total time= 4.8s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=700; total time= 5.0s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=700; total time= 4.9s
[CV] END .......max_depth=5, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=5, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=5, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=5, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=5, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=5, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=5, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=5, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=5, max_samples=1, n_estimators=700; total time= 1.3s
[CV] END .......max_depth=5, max_samples=1, n_estimators=700; total time= 1.3s
[CV] END .......max_depth=5, max_samples=1, n_estimators=700; total time= 1.3s
[CV] END .......max_depth=5, max_samples=1, n_estimators=700; total time= 1.3s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=100; total time= 1.1s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=100; total time= 1.2s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=100; total time= 1.3s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=100; total time= 1.1s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=400; total time= 3.5s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=400; total time= 3.4s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=400; total time= 3.8s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=400; total time= 3.5s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=700; total time= 5.5s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=700; total time= 5.6s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=700; total time= 5.7s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=700; total time= 5.7s
[CV] END .......max_depth=6, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=6, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=6, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=6, max_samples=1, n_estimators=100; total time= 0.5s
[CV] END .......max_depth=6, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=6, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=6, max_samples=1, n_estimators=400; total time= 0.7s
[CV] END .......max_depth=6, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=6, max_samples=1, n_estimators=700; total time= 1.2s
[CV] END .......max_depth=6, max_samples=1, n_estimators=700; total time= 1.3s
[CV] END .......max_depth=6, max_samples=1, n_estimators=700; total time= 1.2s
[CV] END .......max_depth=6, max_samples=1, n_estimators=700; total time= 1.3s
Best params: {'max_depth': 6, 'max_samples': 0.5, 'n_estimators': 700}
Best score: 0.7092335672546197
Ploting RandomForestClassifier
Conteur de class, fold 0: Counter({0.0: 22703, 1.0: 1973})
Fold 1 AUC : 0.723066
Conteur de class, fold 1: Counter({0.0: 22703, 1.0: 1974})
Fold 2 AUC : 0.716841
Conteur de class, fold 2: Counter({0.0: 22703, 1.0: 1974})
Fold 3 AUC : 0.696013
Conteur de class, fold 3: Counter({0.0: 22703, 1.0: 1974})
Fold 4 AUC : 0.707200
Conteur de class, fold 4: Counter({0.0: 22704, 1.0: 1973})
Fold 5 AUC : 0.713347
RandomForestClassifier(max_depth=6, max_samples=0.5, n_estimators=700, n_jobs=8)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier(max_depth=6, max_samples=0.5, n_estimators=700, n_jobs=8)
from sklearn.ensemble import RandomForestClassifier
gridParams = {
'n_estimators': [100, 400, 700],
'max_depth': [3, 4, 5, 6],
'max_samples': [0.5, 1],
}
# Initiate classifier to use
clf = RandomForestClassifier(
n_jobs=8,
class_weight='balanced',
verbose=0,
)
train_df, _ = train_test_split(data_sampling)
X, y = feats_target_split(train_df)
X.fillna(X.median(), inplace=True)
rfc_grid = gscv_classifier(X, y, gridParams, clf)
plot_balanced_classifier(X, y, rfc_grid.best_estimator_, "RandomForest with balanced class weight")
Train shape: (30846, 425), test shape: (4780, 425)
Conteur de class Counter({0.0: 28379, 1.0: 2467})
Starting RandomForestClassifier
Fitting 4 folds for each of 24 candidates, totalling 96 fits
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=100; total time= 0.9s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=100; total time= 1.0s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=100; total time= 1.0s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=100; total time= 1.1s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=400; total time= 2.3s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=400; total time= 2.2s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=400; total time= 2.3s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=400; total time= 2.3s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=700; total time= 3.9s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=700; total time= 3.5s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=700; total time= 3.5s
[CV] END .....max_depth=3, max_samples=0.5, n_estimators=700; total time= 3.5s
[CV] END .......max_depth=3, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=3, max_samples=1, n_estimators=100; total time= 0.5s
[CV] END .......max_depth=3, max_samples=1, n_estimators=100; total time= 0.5s
[CV] END .......max_depth=3, max_samples=1, n_estimators=100; total time= 0.5s
[CV] END .......max_depth=3, max_samples=1, n_estimators=400; total time= 0.9s
[CV] END .......max_depth=3, max_samples=1, n_estimators=400; total time= 1.1s
[CV] END .......max_depth=3, max_samples=1, n_estimators=400; total time= 1.0s
[CV] END .......max_depth=3, max_samples=1, n_estimators=400; total time= 0.9s
[CV] END .......max_depth=3, max_samples=1, n_estimators=700; total time= 1.4s
[CV] END .......max_depth=3, max_samples=1, n_estimators=700; total time= 1.4s
[CV] END .......max_depth=3, max_samples=1, n_estimators=700; total time= 1.4s
[CV] END .......max_depth=3, max_samples=1, n_estimators=700; total time= 1.5s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=100; total time= 1.0s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=100; total time= 1.1s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=100; total time= 1.0s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=100; total time= 1.0s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=400; total time= 3.4s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=400; total time= 2.7s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=400; total time= 2.7s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=400; total time= 2.9s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=700; total time= 4.3s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=700; total time= 4.4s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=700; total time= 4.6s
[CV] END .....max_depth=4, max_samples=0.5, n_estimators=700; total time= 4.5s
[CV] END .......max_depth=4, max_samples=1, n_estimators=100; total time= 0.5s
[CV] END .......max_depth=4, max_samples=1, n_estimators=100; total time= 0.5s
[CV] END .......max_depth=4, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=4, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=4, max_samples=1, n_estimators=400; total time= 0.9s
[CV] END .......max_depth=4, max_samples=1, n_estimators=400; total time= 0.9s
[CV] END .......max_depth=4, max_samples=1, n_estimators=400; total time= 0.9s
[CV] END .......max_depth=4, max_samples=1, n_estimators=400; total time= 1.1s
[CV] END .......max_depth=4, max_samples=1, n_estimators=700; total time= 1.6s
[CV] END .......max_depth=4, max_samples=1, n_estimators=700; total time= 1.3s
[CV] END .......max_depth=4, max_samples=1, n_estimators=700; total time= 1.5s
[CV] END .......max_depth=4, max_samples=1, n_estimators=700; total time= 1.3s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=100; total time= 1.2s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=100; total time= 1.1s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=100; total time= 1.2s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=100; total time= 1.1s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=400; total time= 3.2s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=400; total time= 3.2s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=400; total time= 3.3s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=400; total time= 3.4s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=700; total time= 5.3s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=700; total time= 5.4s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=700; total time= 5.6s
[CV] END .....max_depth=5, max_samples=0.5, n_estimators=700; total time= 5.3s
[CV] END .......max_depth=5, max_samples=1, n_estimators=100; total time= 0.5s
[CV] END .......max_depth=5, max_samples=1, n_estimators=100; total time= 0.5s
[CV] END .......max_depth=5, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=5, max_samples=1, n_estimators=100; total time= 0.5s
[CV] END .......max_depth=5, max_samples=1, n_estimators=400; total time= 0.9s
[CV] END .......max_depth=5, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=5, max_samples=1, n_estimators=400; total time= 0.9s
[CV] END .......max_depth=5, max_samples=1, n_estimators=400; total time= 1.1s
[CV] END .......max_depth=5, max_samples=1, n_estimators=700; total time= 2.1s
[CV] END .......max_depth=5, max_samples=1, n_estimators=700; total time= 1.4s
[CV] END .......max_depth=5, max_samples=1, n_estimators=700; total time= 1.5s
[CV] END .......max_depth=5, max_samples=1, n_estimators=700; total time= 1.6s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=100; total time= 1.3s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=100; total time= 1.3s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=100; total time= 1.2s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=100; total time= 1.3s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=400; total time= 3.9s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=400; total time= 3.6s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=400; total time= 3.6s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=400; total time= 3.7s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=700; total time= 6.2s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=700; total time= 6.3s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=700; total time= 6.2s
[CV] END .....max_depth=6, max_samples=0.5, n_estimators=700; total time= 6.0s
[CV] END .......max_depth=6, max_samples=1, n_estimators=100; total time= 0.5s
[CV] END .......max_depth=6, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=6, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=6, max_samples=1, n_estimators=100; total time= 0.4s
[CV] END .......max_depth=6, max_samples=1, n_estimators=400; total time= 0.9s
[CV] END .......max_depth=6, max_samples=1, n_estimators=400; total time= 0.9s
[CV] END .......max_depth=6, max_samples=1, n_estimators=400; total time= 0.9s
[CV] END .......max_depth=6, max_samples=1, n_estimators=400; total time= 0.8s
[CV] END .......max_depth=6, max_samples=1, n_estimators=700; total time= 1.3s
[CV] END .......max_depth=6, max_samples=1, n_estimators=700; total time= 1.6s
[CV] END .......max_depth=6, max_samples=1, n_estimators=700; total time= 1.4s
[CV] END .......max_depth=6, max_samples=1, n_estimators=700; total time= 1.3s
Best params: {'max_depth': 5, 'max_samples': 0.5, 'n_estimators': 400}
Best score: 0.7154566051827315
Ploting RandomForestClassifier
Conteur de class, fold 0: Counter({0.0: 22703, 1.0: 1973})
Fold 1 AUC : 0.728331
Conteur de class, fold 1: Counter({0.0: 22703, 1.0: 1974})
Fold 2 AUC : 0.715927
Conteur de class, fold 2: Counter({0.0: 22703, 1.0: 1974})
Fold 3 AUC : 0.703136
Conteur de class, fold 3: Counter({0.0: 22703, 1.0: 1974})
Fold 4 AUC : 0.709835
Conteur de class, fold 4: Counter({0.0: 22704, 1.0: 1973})
Fold 5 AUC : 0.721471
RandomForestClassifier(class_weight='balanced', max_depth=5, max_samples=0.5,
n_estimators=400, n_jobs=8)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. RandomForestClassifier(class_weight='balanced', max_depth=5, max_samples=0.5,
n_estimators=400, n_jobs=8)import imblearn
print(imblearn.__version__)
train_df, test_df = train_test_split(data_sampling)
X, y = feats_target_split(train_df)
X.fillna(X.median(), inplace=True)
0.7.0
Train shape: (30846, 425), test shape: (4780, 425)
Conteur de class Counter({0.0: 28379, 1.0: 2467})
from collections import Counter
from imblearn.over_sampling import SMOTE
# summarize class distribution
counter = Counter(y)
print("Before resampling:", counter)
print("Ratio of positive class: {}".format(counter[1] / len(y)))
# transform the dataset
over = SMOTE()
# transform the dataset
oX, oy = over.fit_resample(X, y)
# summarize the new class distribution
counter = Counter(oy)
print(counter)
Before resampling: Counter({0.0: 28379, 1.0: 2467})
Ratio of positive class: 0.07997795500226934
Counter({0.0: 28379, 1.0: 28379})
from lightgbm import LGBMClassifier
# Initiate classifier to use
clf = LGBMClassifier(
objective='binary',
n_jobs=8,
verbose=-1,
force_row_wise=True,
is_unbalance=True, # try to Automatically balance the weight of the dominated labels
num_leaves=16, # Shall be smaller than 2^(max_depth)
max_depth=8,
min_child_samples=1000,
learning_rate=0.1,
colsample_bytree=0.5, # Alias for feature_fraction
)
train_df, test_df = train_test_split(data_sampling)
X, y = feats_target_split(train_df)
X.fillna(X.median(), inplace=True)
plot_balanced_classifier(X, y, clf, "LightGBM with oversampling", oversample=True)
Train shape: (30846, 425), test shape: (4780, 425)
Conteur de class Counter({0.0: 28379, 1.0: 2467})
Ploting LGBMClassifier
Oversampling...
Conteur de class, fold 0: Counter({0.0: 22703, 1.0: 22703})
Fold 1 AUC : 0.719533
Oversampling...
Conteur de class, fold 1: Counter({0.0: 22703, 1.0: 22703})
Fold 2 AUC : 0.695094
Oversampling...
Conteur de class, fold 2: Counter({0.0: 22703, 1.0: 22703})
Fold 3 AUC : 0.705016
Oversampling...
Conteur de class, fold 3: Counter({0.0: 22703, 1.0: 22703})
Fold 4 AUC : 0.718867
Oversampling...
Conteur de class, fold 4: Counter({0.0: 22704, 1.0: 22704})
Fold 5 AUC : 0.711065
LGBMClassifier(colsample_bytree=0.5, force_row_wise=True, is_unbalance=True,
max_depth=8, min_child_samples=1000, n_jobs=8, num_leaves=16,
objective='binary', verbose=-1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. LGBMClassifier(colsample_bytree=0.5, force_row_wise=True, is_unbalance=True,
max_depth=8, min_child_samples=1000, n_jobs=8, num_leaves=16,
objective='binary', verbose=-1)import imblearn
print(imblearn.__version__)
train_df, _ = train_test_split(data)
X, y = feats_target_split(train_df)
X.fillna(X.median(), inplace=True)
0.7.0
Train shape: (307511, 425), test shape: (48744, 425)
Conteur de class Counter({0.0: 282686, 1.0: 24825})
from collections import Counter
from imblearn.under_sampling import RandomUnderSampler
# summarize class distribution
counter = Counter(y)
print("Before resampling:", counter)
print("Ratio of positive class: {}".format(counter[1] / len(y)))
# transform the dataset
under = RandomUnderSampler()
# transform the dataset
X, y = under.fit_resample(X, y)
# summarize the new class distribution
counter = Counter(y)
print(counter)
Before resampling: Counter({0.0: 282686, 1.0: 24825})
Ratio of positive class: 0.08072881945686496
Counter({0.0: 24825, 1.0: 24825})
from lightgbm import LGBMClassifier
# Initiate classifier to use
clf = LGBMClassifier(
objective='binary',
n_jobs=8,
verbose=-1,
force_row_wise=True,
is_unbalance=True, # try to Automatically balance the weight of the dominated labels
num_leaves=16, # Shall be smaller than 2^(max_depth)
max_depth=8,
min_child_samples=1000,
learning_rate=0.1,
colsample_bytree=0.5, # Alias for feature_fraction
)
train_df, test_df = train_test_split(data_sampling)
X, y = feats_target_split(train_df)
X.fillna(X.median(), inplace=True)
plot_balanced_classifier(X, y, clf, "LightGBM with undersampling", undersample=True)
Train shape: (30846, 425), test shape: (4780, 425)
Conteur de class Counter({0.0: 28379, 1.0: 2467})
Ploting LGBMClassifier
Undersampling...
Conteur de class, fold 0: Counter({0.0: 1973, 1.0: 1973})
Fold 1 AUC : 0.734981
Undersampling...
Conteur de class, fold 1: Counter({0.0: 1974, 1.0: 1974})
Fold 2 AUC : 0.717625
Undersampling...
Conteur de class, fold 2: Counter({0.0: 1974, 1.0: 1974})
Fold 3 AUC : 0.707004
Undersampling...
Conteur de class, fold 3: Counter({0.0: 1974, 1.0: 1974})
Fold 4 AUC : 0.725478
Undersampling...
Conteur de class, fold 4: Counter({0.0: 1973, 1.0: 1973})
Fold 5 AUC : 0.719662
LGBMClassifier(colsample_bytree=0.5, force_row_wise=True, is_unbalance=True,
max_depth=8, min_child_samples=1000, n_jobs=8, num_leaves=16,
objective='binary', verbose=-1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. LGBMClassifier(colsample_bytree=0.5, force_row_wise=True, is_unbalance=True,
max_depth=8, min_child_samples=1000, n_jobs=8, num_leaves=16,
objective='binary', verbose=-1)import imblearn
print(imblearn.__version__)
train_df, _ = train_test_split(data_sampling)
X, y = feats_target_split(train_df)
X.fillna(X.median(), inplace=True)
0.7.0
Train shape: (30846, 425), test shape: (4780, 425)
Conteur de class Counter({0.0: 28379, 1.0: 2467})
from collections import Counter
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
# summarize class distribution
counter = Counter(y)
print("Before resampling:", counter)
print("Ratio of positive class: {}".format(counter[1] / len(y)))
# transform the dataset
over = SMOTE(sampling_strategy=0.1)
under = RandomUnderSampler(sampling_strategy=0.5)
steps = [('o', over), ('u', under)]
pipeline = Pipeline(steps=steps)
# transform the dataset
X, y = pipeline.fit_resample(X, y)
# summarize the new class distribution
counter = Counter(y)
print(counter)
Before resampling: Counter({0.0: 28379, 1.0: 2467})
Ratio of positive class: 0.07997795500226934
Counter({0.0: 5674, 1.0: 2837})
from lightgbm import LGBMClassifier
# Initiate classifier to use
clf = LGBMClassifier(
objective='binary',
n_jobs=8,
verbose=-1,
force_row_wise=True,
is_unbalance=True, # try to Automatically balance the weight of the dominated labels
num_leaves=16, # Shall be smaller than 2^(max_depth)
max_depth=8,
min_child_samples=1000,
learning_rate=0.1,
colsample_bytree=0.5, # Alias for feature_fraction
)
train_df, test_df = train_test_split(data_sampling)
X, y = feats_target_split(train_df)
X.fillna(X.median(), inplace=True)
plot_balanced_classifier(
X,
y,
clf,
"LightGBM with under/over-sampling",
undersample=True,
oversample=True,
underparams={'sampling_strategy':0.5},
overparams={'sampling_strategy':0.1}
)
Train shape: (30846, 425), test shape: (4780, 425)
Conteur de class Counter({0.0: 28379, 1.0: 2467})
Ploting LGBMClassifier
Pipeline Oversampling and Undersampling...
Conteur de class, fold 0: Counter({0.0: 4540, 1.0: 2270})
Fold 1 AUC : 0.744904
Pipeline Oversampling and Undersampling...
Conteur de class, fold 1: Counter({0.0: 4540, 1.0: 2270})
Fold 2 AUC : 0.728864
Pipeline Oversampling and Undersampling...
Conteur de class, fold 2: Counter({0.0: 4540, 1.0: 2270})
Fold 3 AUC : 0.722399
Pipeline Oversampling and Undersampling...
Conteur de class, fold 3: Counter({0.0: 4540, 1.0: 2270})
Fold 4 AUC : 0.736994
Pipeline Oversampling and Undersampling...
Conteur de class, fold 4: Counter({0.0: 4540, 1.0: 2270})
Fold 5 AUC : 0.728496
LGBMClassifier(colsample_bytree=0.5, force_row_wise=True, is_unbalance=True,
max_depth=8, min_child_samples=1000, n_jobs=8, num_leaves=16,
objective='binary', verbose=-1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. LGBMClassifier(colsample_bytree=0.5, force_row_wise=True, is_unbalance=True,
max_depth=8, min_child_samples=1000, n_jobs=8, num_leaves=16,
objective='binary', verbose=-1)comment sortir un modèle entrainé à partir des folds ?
train_df, test_df = train_test_split(data_sampling)
X, y = feats_target_split(train_df)
X_test, y_test = feats_target_split(test_df)
X.fillna(X.median(), inplace=True)
X_test.fillna(X_test.median(), inplace=True)
Train shape: (30846, 425), test shape: (4780, 425)
Conteur de class Counter({0.0: 28379, 1.0: 2467})
from lime import lime_tabular
from lightgbm import LGBMClassifier
# Initiate classifier to use
clf = LGBMClassifier(
objective='binary',
n_jobs=8,
verbose=-1,
force_row_wise=True,
is_unbalance=True, # try to Automatically balance the weight of the dominated labels
num_leaves=16, # Shall be smaller than 2^(max_depth)
max_depth=8,
min_child_samples=1000,
learning_rate=0.1,
colsample_bytree=0.5, # Alias for feature_fraction
)
clf.fit(X, y, eval_set=[(X, y)], eval_metric='auc', early_stopping_rounds=10)
explainer = lime_tabular.LimeTabularExplainer(
X.values,
mode="classification",
class_names=["0", "1"],
feature_names=X.columns.to_list(),
)
idx = random.randint(1, len(X_test))
explanation = explainer.explain_instance(X_test.iloc[idx], clf.predict_proba, num_features=len(X.columns))
explanation.show_in_notebook()
[1] training's auc: 0.656564 training's binary_logloss: 0.284852 [2] training's auc: 0.688334 training's binary_logloss: 0.302254 [3] training's auc: 0.728079 training's binary_logloss: 0.318197 [4] training's auc: 0.742795 training's binary_logloss: 0.335407 [5] training's auc: 0.750071 training's binary_logloss: 0.352901 [6] training's auc: 0.755264 training's binary_logloss: 0.369976 [7] training's auc: 0.758383 training's binary_logloss: 0.386172 [8] training's auc: 0.762716 training's binary_logloss: 0.402352 [9] training's auc: 0.765007 training's binary_logloss: 0.416585 [10] training's auc: 0.767662 training's binary_logloss: 0.429761 [11] training's auc: 0.768917 training's binary_logloss: 0.442517 [12] training's auc: 0.771207 training's binary_logloss: 0.45399 [13] training's auc: 0.773099 training's binary_logloss: 0.464611 [14] training's auc: 0.77497 training's binary_logloss: 0.473975 [15] training's auc: 0.777909 training's binary_logloss: 0.482927 [16] training's auc: 0.780359 training's binary_logloss: 0.49094 [17] training's auc: 0.783231 training's binary_logloss: 0.498095 [18] training's auc: 0.785193 training's binary_logloss: 0.504342 [19] training's auc: 0.787953 training's binary_logloss: 0.509577 [20] training's auc: 0.789888 training's binary_logloss: 0.514736 [21] training's auc: 0.790688 training's binary_logloss: 0.51927 [22] training's auc: 0.792694 training's binary_logloss: 0.522947 [23] training's auc: 0.79387 training's binary_logloss: 0.526251 [24] training's auc: 0.794977 training's binary_logloss: 0.529357 [25] training's auc: 0.796639 training's binary_logloss: 0.532183 [26] training's auc: 0.79839 training's binary_logloss: 0.534443 [27] training's auc: 0.800353 training's binary_logloss: 0.536274 [28] training's auc: 0.80164 training's binary_logloss: 0.537989 [29] training's auc: 0.803062 training's binary_logloss: 0.539332 [30] training's auc: 0.804005 training's binary_logloss: 0.540502 [31] training's auc: 0.804856 training's binary_logloss: 0.541432 [32] training's auc: 0.805929 training's binary_logloss: 0.54211 [33] training's auc: 0.807423 training's binary_logloss: 0.542511 [34] training's auc: 0.808132 training's binary_logloss: 0.542973 [35] training's auc: 0.809185 training's binary_logloss: 0.543211 [36] training's auc: 0.810041 training's binary_logloss: 0.54337 [37] training's auc: 0.811658 training's binary_logloss: 0.543183 [38] training's auc: 0.812664 training's binary_logloss: 0.543266 [39] training's auc: 0.813886 training's binary_logloss: 0.543039 [40] training's auc: 0.81498 training's binary_logloss: 0.542707 [41] training's auc: 0.816247 training's binary_logloss: 0.542364 [42] training's auc: 0.817277 training's binary_logloss: 0.542048 [43] training's auc: 0.817942 training's binary_logloss: 0.541588 [44] training's auc: 0.818958 training's binary_logloss: 0.541065 [45] training's auc: 0.820182 training's binary_logloss: 0.540341 [46] training's auc: 0.821349 training's binary_logloss: 0.539638 [47] training's auc: 0.822321 training's binary_logloss: 0.539236 [48] training's auc: 0.823113 training's binary_logloss: 0.538717 [49] training's auc: 0.823987 training's binary_logloss: 0.537926 [50] training's auc: 0.825181 training's binary_logloss: 0.5372 [51] training's auc: 0.825858 training's binary_logloss: 0.536425 [52] training's auc: 0.82695 training's binary_logloss: 0.535682 [53] training's auc: 0.827954 training's binary_logloss: 0.534648 [54] training's auc: 0.828947 training's binary_logloss: 0.533798 [55] training's auc: 0.830061 training's binary_logloss: 0.532843 [56] training's auc: 0.830547 training's binary_logloss: 0.532163 [57] training's auc: 0.831424 training's binary_logloss: 0.531308 [58] training's auc: 0.832662 training's binary_logloss: 0.530164 [59] training's auc: 0.833343 training's binary_logloss: 0.529481 [60] training's auc: 0.834185 training's binary_logloss: 0.528634 [61] training's auc: 0.835042 training's binary_logloss: 0.527733 [62] training's auc: 0.836457 training's binary_logloss: 0.526536 [63] training's auc: 0.837284 training's binary_logloss: 0.525765 [64] training's auc: 0.838172 training's binary_logloss: 0.52486 [65] training's auc: 0.839109 training's binary_logloss: 0.52405 [66] training's auc: 0.840048 training's binary_logloss: 0.523147 [67] training's auc: 0.840672 training's binary_logloss: 0.522322 [68] training's auc: 0.841547 training's binary_logloss: 0.521412 [69] training's auc: 0.842452 training's binary_logloss: 0.520488 [70] training's auc: 0.843243 training's binary_logloss: 0.519572 [71] training's auc: 0.843589 training's binary_logloss: 0.518952 [72] training's auc: 0.844457 training's binary_logloss: 0.518109 [73] training's auc: 0.845404 training's binary_logloss: 0.517161 [74] training's auc: 0.846149 training's binary_logloss: 0.516298 [75] training's auc: 0.847158 training's binary_logloss: 0.515343 [76] training's auc: 0.847909 training's binary_logloss: 0.514555 [77] training's auc: 0.848792 training's binary_logloss: 0.513656 [78] training's auc: 0.849698 training's binary_logloss: 0.512762 [79] training's auc: 0.850395 training's binary_logloss: 0.511906 [80] training's auc: 0.851204 training's binary_logloss: 0.51096 [81] training's auc: 0.851926 training's binary_logloss: 0.510069 [82] training's auc: 0.852584 training's binary_logloss: 0.509186 [83] training's auc: 0.853022 training's binary_logloss: 0.508719 [84] training's auc: 0.85364 training's binary_logloss: 0.507918 [85] training's auc: 0.854563 training's binary_logloss: 0.507031 [86] training's auc: 0.855288 training's binary_logloss: 0.506195 [87] training's auc: 0.856089 training's binary_logloss: 0.505316 [88] training's auc: 0.856777 training's binary_logloss: 0.50449 [89] training's auc: 0.857656 training's binary_logloss: 0.503528 [90] training's auc: 0.858267 training's binary_logloss: 0.502989 [91] training's auc: 0.859027 training's binary_logloss: 0.502142 [92] training's auc: 0.859978 training's binary_logloss: 0.50127 [93] training's auc: 0.860731 training's binary_logloss: 0.5004 [94] training's auc: 0.861708 training's binary_logloss: 0.499515 [95] training's auc: 0.862075 training's binary_logloss: 0.499073 [96] training's auc: 0.862918 training's binary_logloss: 0.498172 [97] training's auc: 0.863562 training's binary_logloss: 0.497374 [98] training's auc: 0.864385 training's binary_logloss: 0.496486 [99] training's auc: 0.86492 training's binary_logloss: 0.495818 [100] training's auc: 0.8658 training's binary_logloss: 0.494867
import shap
# print the JS visualization code to the notebook
shap.initjs()
train_df, test_df = train_test_split(data_sampling)
X, y = feats_target_split(train_df)
X_test, y_test = feats_target_split(test_df)
X.fillna(X.median(), inplace=True)
X_test.fillna(X_test.median(), inplace=True)
Train shape: (30846, 425), test shape: (4780, 425)
Conteur de class Counter({0.0: 28379, 1.0: 2467})
from lightgbm import LGBMClassifier
import shap
# Initiate classifier to use
clf = LGBMClassifier(
objective='binary',
n_jobs=8,
verbose=-1,
force_row_wise=True,
is_unbalance=True, # try to Automatically balance the weight of the dominated labels
num_leaves=16, # Shall be smaller than 2^(max_depth)
max_depth=8,
min_child_samples=1000,
learning_rate=0.1,
colsample_bytree=0.5, # Alias for feature_fraction
)
clf.fit(X, y, eval_set=[(X, y)], eval_metric='auc', early_stopping_rounds=10)
explainer = shap.TreeExplainer(clf)
shap_values = explainer.shap_values(X)
sv = np.array(shap_values)
y = clf.predict(X).astype("bool")
sv_positive = sv[:,y,:]
sv_negative = sv[:,~y,:]
[1] training's auc: 0.656564 training's binary_logloss: 0.284852 [2] training's auc: 0.688334 training's binary_logloss: 0.302254 [3] training's auc: 0.728079 training's binary_logloss: 0.318197 [4] training's auc: 0.742795 training's binary_logloss: 0.335407 [5] training's auc: 0.750071 training's binary_logloss: 0.352901 [6] training's auc: 0.755264 training's binary_logloss: 0.369976 [7] training's auc: 0.758383 training's binary_logloss: 0.386172 [8] training's auc: 0.762716 training's binary_logloss: 0.402352 [9] training's auc: 0.765007 training's binary_logloss: 0.416585 [10] training's auc: 0.767662 training's binary_logloss: 0.429761 [11] training's auc: 0.768917 training's binary_logloss: 0.442517 [12] training's auc: 0.771207 training's binary_logloss: 0.45399 [13] training's auc: 0.773099 training's binary_logloss: 0.464611 [14] training's auc: 0.77497 training's binary_logloss: 0.473975 [15] training's auc: 0.777909 training's binary_logloss: 0.482927 [16] training's auc: 0.780359 training's binary_logloss: 0.49094 [17] training's auc: 0.783231 training's binary_logloss: 0.498095 [18] training's auc: 0.785193 training's binary_logloss: 0.504342 [19] training's auc: 0.787953 training's binary_logloss: 0.509577 [20] training's auc: 0.789888 training's binary_logloss: 0.514736 [21] training's auc: 0.790688 training's binary_logloss: 0.51927 [22] training's auc: 0.792694 training's binary_logloss: 0.522947 [23] training's auc: 0.79387 training's binary_logloss: 0.526251 [24] training's auc: 0.794977 training's binary_logloss: 0.529357 [25] training's auc: 0.796639 training's binary_logloss: 0.532183 [26] training's auc: 0.79839 training's binary_logloss: 0.534443 [27] training's auc: 0.800353 training's binary_logloss: 0.536274 [28] training's auc: 0.80164 training's binary_logloss: 0.537989 [29] training's auc: 0.803062 training's binary_logloss: 0.539332 [30] training's auc: 0.804005 training's binary_logloss: 0.540502 [31] training's auc: 0.804856 training's binary_logloss: 0.541432 [32] training's auc: 0.805929 training's binary_logloss: 0.54211 [33] training's auc: 0.807423 training's binary_logloss: 0.542511 [34] training's auc: 0.808132 training's binary_logloss: 0.542973 [35] training's auc: 0.809185 training's binary_logloss: 0.543211 [36] training's auc: 0.810041 training's binary_logloss: 0.54337 [37] training's auc: 0.811658 training's binary_logloss: 0.543183 [38] training's auc: 0.812664 training's binary_logloss: 0.543266 [39] training's auc: 0.813886 training's binary_logloss: 0.543039 [40] training's auc: 0.81498 training's binary_logloss: 0.542707 [41] training's auc: 0.816247 training's binary_logloss: 0.542364 [42] training's auc: 0.817277 training's binary_logloss: 0.542048 [43] training's auc: 0.817942 training's binary_logloss: 0.541588 [44] training's auc: 0.818958 training's binary_logloss: 0.541065 [45] training's auc: 0.820182 training's binary_logloss: 0.540341 [46] training's auc: 0.821349 training's binary_logloss: 0.539638 [47] training's auc: 0.822321 training's binary_logloss: 0.539236 [48] training's auc: 0.823113 training's binary_logloss: 0.538717 [49] training's auc: 0.823987 training's binary_logloss: 0.537926 [50] training's auc: 0.825181 training's binary_logloss: 0.5372 [51] training's auc: 0.825858 training's binary_logloss: 0.536425 [52] training's auc: 0.82695 training's binary_logloss: 0.535682 [53] training's auc: 0.827954 training's binary_logloss: 0.534648 [54] training's auc: 0.828947 training's binary_logloss: 0.533798 [55] training's auc: 0.830061 training's binary_logloss: 0.532843 [56] training's auc: 0.830547 training's binary_logloss: 0.532163 [57] training's auc: 0.831424 training's binary_logloss: 0.531308 [58] training's auc: 0.832662 training's binary_logloss: 0.530164 [59] training's auc: 0.833343 training's binary_logloss: 0.529481 [60] training's auc: 0.834185 training's binary_logloss: 0.528634 [61] training's auc: 0.835042 training's binary_logloss: 0.527733 [62] training's auc: 0.836457 training's binary_logloss: 0.526536 [63] training's auc: 0.837284 training's binary_logloss: 0.525765 [64] training's auc: 0.838172 training's binary_logloss: 0.52486 [65] training's auc: 0.839109 training's binary_logloss: 0.52405 [66] training's auc: 0.840048 training's binary_logloss: 0.523147 [67] training's auc: 0.840672 training's binary_logloss: 0.522322 [68] training's auc: 0.841547 training's binary_logloss: 0.521412 [69] training's auc: 0.842452 training's binary_logloss: 0.520488 [70] training's auc: 0.843243 training's binary_logloss: 0.519572 [71] training's auc: 0.843589 training's binary_logloss: 0.518952 [72] training's auc: 0.844457 training's binary_logloss: 0.518109 [73] training's auc: 0.845404 training's binary_logloss: 0.517161 [74] training's auc: 0.846149 training's binary_logloss: 0.516298 [75] training's auc: 0.847158 training's binary_logloss: 0.515343 [76] training's auc: 0.847909 training's binary_logloss: 0.514555 [77] training's auc: 0.848792 training's binary_logloss: 0.513656 [78] training's auc: 0.849698 training's binary_logloss: 0.512762 [79] training's auc: 0.850395 training's binary_logloss: 0.511906 [80] training's auc: 0.851204 training's binary_logloss: 0.51096 [81] training's auc: 0.851926 training's binary_logloss: 0.510069 [82] training's auc: 0.852584 training's binary_logloss: 0.509186 [83] training's auc: 0.853022 training's binary_logloss: 0.508719 [84] training's auc: 0.85364 training's binary_logloss: 0.507918 [85] training's auc: 0.854563 training's binary_logloss: 0.507031 [86] training's auc: 0.855288 training's binary_logloss: 0.506195 [87] training's auc: 0.856089 training's binary_logloss: 0.505316 [88] training's auc: 0.856777 training's binary_logloss: 0.50449 [89] training's auc: 0.857656 training's binary_logloss: 0.503528 [90] training's auc: 0.858267 training's binary_logloss: 0.502989 [91] training's auc: 0.859027 training's binary_logloss: 0.502142 [92] training's auc: 0.859978 training's binary_logloss: 0.50127 [93] training's auc: 0.860731 training's binary_logloss: 0.5004 [94] training's auc: 0.861708 training's binary_logloss: 0.499515 [95] training's auc: 0.862075 training's binary_logloss: 0.499073 [96] training's auc: 0.862918 training's binary_logloss: 0.498172 [97] training's auc: 0.863562 training's binary_logloss: 0.497374 [98] training's auc: 0.864385 training's binary_logloss: 0.496486 [99] training's auc: 0.86492 training's binary_logloss: 0.495818 [100] training's auc: 0.8658 training's binary_logloss: 0.494867
shap.summary_plot(shap_values[1], X.astype("float"))
shap.dependence_plot("EXT_SOURCE_2", shap_values[1], X)
shap.dependence_plot("CODE_GENDER", shap_values[1], X)
shap.dependence_plot("DAYS_BIRTH", shap_values[1], X)
from sklearn.model_selection import train_test_split as sk_train_test_split
train_df, test_df = train_test_split(data)
X, y = feats_target_split(train_df)
X_train, X_test, y_train, y_test = sk_train_test_split(X, y)
Train shape: (307511, 425), test shape: (48744, 425)
Conteur de class Counter({0.0: 282686, 1.0: 24825})
# Minimiser les faux négatifs
from lightgbm import LGBMClassifier
# Initiate classifier to use
clf = LGBMClassifier(
objective='binary',
n_jobs=8,
verbose=-1,
force_row_wise=True,
is_unbalance=True, # try to Automatically balance the weight of the dominated labels
num_leaves=16, # Shall be smaller than 2^(max_depth)
max_depth=8,
min_child_samples=1000,
learning_rate=0.1,
colsample_bytree=0.5, # Alias for feature_fraction
)
# X_train.fillna(X_train.median(), inplace=True)
clf.fit(X_train, y_train, eval_set=[(X_train, y_train)], eval_metric='auc', early_stopping_rounds=10)
[1] training's auc: 0.63919 training's binary_logloss: 0.28723 [2] training's auc: 0.696242 training's binary_logloss: 0.302327 [3] training's auc: 0.708291 training's binary_logloss: 0.32089 [4] training's auc: 0.717796 training's binary_logloss: 0.341245 [5] training's auc: 0.721938 training's binary_logloss: 0.360294 [6] training's auc: 0.723682 training's binary_logloss: 0.378874 [7] training's auc: 0.72552 training's binary_logloss: 0.396688 [8] training's auc: 0.728493 training's binary_logloss: 0.414097 [9] training's auc: 0.72961 training's binary_logloss: 0.429746 [10] training's auc: 0.731878 training's binary_logloss: 0.44479 [11] training's auc: 0.734915 training's binary_logloss: 0.458422 [12] training's auc: 0.736239 training's binary_logloss: 0.471378 [13] training's auc: 0.737125 training's binary_logloss: 0.482759 [14] training's auc: 0.737768 training's binary_logloss: 0.493301 [15] training's auc: 0.739295 training's binary_logloss: 0.503274 [16] training's auc: 0.739661 training's binary_logloss: 0.512071 [17] training's auc: 0.741247 training's binary_logloss: 0.520292 [18] training's auc: 0.741786 training's binary_logloss: 0.52756 [19] training's auc: 0.742639 training's binary_logloss: 0.534071 [20] training's auc: 0.742958 training's binary_logloss: 0.540166 [21] training's auc: 0.743664 training's binary_logloss: 0.545591 [22] training's auc: 0.744574 training's binary_logloss: 0.55043 [23] training's auc: 0.745367 training's binary_logloss: 0.554765 [24] training's auc: 0.746243 training's binary_logloss: 0.558735 [25] training's auc: 0.746828 training's binary_logloss: 0.562303 [26] training's auc: 0.747931 training's binary_logloss: 0.565409 [27] training's auc: 0.748314 training's binary_logloss: 0.568307 [28] training's auc: 0.749331 training's binary_logloss: 0.570763 [29] training's auc: 0.750022 training's binary_logloss: 0.572904 [30] training's auc: 0.750785 training's binary_logloss: 0.574908 [31] training's auc: 0.751522 training's binary_logloss: 0.576542 [32] training's auc: 0.75248 training's binary_logloss: 0.577889 [33] training's auc: 0.753101 training's binary_logloss: 0.579207 [34] training's auc: 0.753965 training's binary_logloss: 0.580317 [35] training's auc: 0.754494 training's binary_logloss: 0.581272 [36] training's auc: 0.755323 training's binary_logloss: 0.581955 [37] training's auc: 0.755951 training's binary_logloss: 0.582612 [38] training's auc: 0.756461 training's binary_logloss: 0.583277 [39] training's auc: 0.757106 training's binary_logloss: 0.583668 [40] training's auc: 0.758043 training's binary_logloss: 0.583681 [41] training's auc: 0.758576 training's binary_logloss: 0.584016 [42] training's auc: 0.759043 training's binary_logloss: 0.584172 [43] training's auc: 0.759703 training's binary_logloss: 0.584294 [44] training's auc: 0.760128 training's binary_logloss: 0.584415 [45] training's auc: 0.760824 training's binary_logloss: 0.584356 [46] training's auc: 0.761319 training's binary_logloss: 0.584362 [47] training's auc: 0.761861 training's binary_logloss: 0.584286 [48] training's auc: 0.762391 training's binary_logloss: 0.584165 [49] training's auc: 0.762871 training's binary_logloss: 0.584019 [50] training's auc: 0.763345 training's binary_logloss: 0.583863 [51] training's auc: 0.763871 training's binary_logloss: 0.583667 [52] training's auc: 0.764403 training's binary_logloss: 0.583398 [53] training's auc: 0.764771 training's binary_logloss: 0.583203 [54] training's auc: 0.765118 training's binary_logloss: 0.582984 [55] training's auc: 0.765679 training's binary_logloss: 0.58269 [56] training's auc: 0.766493 training's binary_logloss: 0.581969 [57] training's auc: 0.766895 training's binary_logloss: 0.58171 [58] training's auc: 0.767398 training's binary_logloss: 0.581396 [59] training's auc: 0.767757 training's binary_logloss: 0.581115 [60] training's auc: 0.768176 training's binary_logloss: 0.580823 [61] training's auc: 0.768644 training's binary_logloss: 0.580471 [62] training's auc: 0.769001 training's binary_logloss: 0.580194 [63] training's auc: 0.769432 training's binary_logloss: 0.579881 [64] training's auc: 0.769831 training's binary_logloss: 0.579578 [65] training's auc: 0.770289 training's binary_logloss: 0.579194 [66] training's auc: 0.770599 training's binary_logloss: 0.578885 [67] training's auc: 0.771015 training's binary_logloss: 0.578503 [68] training's auc: 0.771333 training's binary_logloss: 0.578228 [69] training's auc: 0.77164 training's binary_logloss: 0.577962 [70] training's auc: 0.771985 training's binary_logloss: 0.577648 [71] training's auc: 0.772285 training's binary_logloss: 0.577363 [72] training's auc: 0.772745 training's binary_logloss: 0.576955 [73] training's auc: 0.773055 training's binary_logloss: 0.576658 [74] training's auc: 0.773359 training's binary_logloss: 0.576383 [75] training's auc: 0.773719 training's binary_logloss: 0.576041 [76] training's auc: 0.774045 training's binary_logloss: 0.575749 [77] training's auc: 0.774308 training's binary_logloss: 0.57548 [78] training's auc: 0.774638 training's binary_logloss: 0.57515 [79] training's auc: 0.775002 training's binary_logloss: 0.574828 [80] training's auc: 0.775299 training's binary_logloss: 0.574493 [81] training's auc: 0.775609 training's binary_logloss: 0.574199 [82] training's auc: 0.775872 training's binary_logloss: 0.57389 [83] training's auc: 0.776177 training's binary_logloss: 0.573611 [84] training's auc: 0.776437 training's binary_logloss: 0.573345 [85] training's auc: 0.776768 training's binary_logloss: 0.57305 [86] training's auc: 0.777064 training's binary_logloss: 0.57277 [87] training's auc: 0.777333 training's binary_logloss: 0.572516 [88] training's auc: 0.777649 training's binary_logloss: 0.572201 [89] training's auc: 0.778001 training's binary_logloss: 0.571892 [90] training's auc: 0.778252 training's binary_logloss: 0.571613 [91] training's auc: 0.778634 training's binary_logloss: 0.571237 [92] training's auc: 0.778901 training's binary_logloss: 0.570969 [93] training's auc: 0.779209 training's binary_logloss: 0.570686 [94] training's auc: 0.779518 training's binary_logloss: 0.570371 [95] training's auc: 0.779781 training's binary_logloss: 0.570107 [96] training's auc: 0.779976 training's binary_logloss: 0.569892 [97] training's auc: 0.780247 training's binary_logloss: 0.569653 [98] training's auc: 0.780429 training's binary_logloss: 0.56942 [99] training's auc: 0.780727 training's binary_logloss: 0.569159 [100] training's auc: 0.780939 training's binary_logloss: 0.568939
LGBMClassifier(colsample_bytree=0.5, force_row_wise=True, is_unbalance=True,
max_depth=8, min_child_samples=1000, n_jobs=8, num_leaves=16,
objective='binary', verbose=-1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. LGBMClassifier(colsample_bytree=0.5, force_row_wise=True, is_unbalance=True,
max_depth=8, min_child_samples=1000, n_jobs=8, num_leaves=16,
objective='binary', verbose=-1)predproba = clf.predict_proba(X_test)
predprobapos = predproba[:,1]
print("Proba:", predprobapos)
auc_score = roc_auc_score(y_test, predprobapos)
print("AUC:", auc_score)
Proba: [0.18511533 0.33047083 0.41513223 ... 0.21674607 0.64368859 0.48100921] AUC: 0.7574614918450082
# minimiser les faux negatifs en pondérant FN et FP
from sklearn.metrics import confusion_matrix
metric_metier = {}
threshold = np.linspace(0,1,101)
for i in threshold:
print(">> Threshold", i)
predclass = predprobapos.copy()
predclass[predclass < i] = 0
predclass[predclass >= i] = 1
res = confusion_matrix(y_test, predclass)
tn, fp, fn, tp = res.ravel()
print("True negative:", tn)
print("False positive:", fp)
metric_metier[i] = (10*fn+fp)
key_min = min(metric_metier, key=metric_metier.get)
print("------------------------------------------------------")
print("Minimum:", key_min)
print("Minimum value:", metric_metier[key_min])
>> Threshold 0.0 True negative: 0 False positive: 70598 >> Threshold 0.01 True negative: 0 False positive: 70598 >> Threshold 0.02 True negative: 0 False positive: 70598 >> Threshold 0.03 True negative: 3 False positive: 70595 >> Threshold 0.04 True negative: 45 False positive: 70553 >> Threshold 0.05 True negative: 125 False positive: 70473 >> Threshold 0.06 True negative: 284 False positive: 70314 >> Threshold 0.07 True negative: 513 False positive: 70085 >> Threshold 0.08 True negative: 824 False positive: 69774 >> Threshold 0.09 True negative: 1234 False positive: 69364 >> Threshold 0.1 True negative: 1690 False positive: 68908 >> Threshold 0.11 True negative: 2234 False positive: 68364 >> Threshold 0.12 True negative: 2850 False positive: 67748 >> Threshold 0.13 True negative: 3586 False positive: 67012 >> Threshold 0.14 True negative: 4429 False positive: 66169 >> Threshold 0.15 True negative: 5280 False positive: 65318 >> Threshold 0.16 True negative: 6267 False positive: 64331 >> Threshold 0.17 True negative: 7359 False positive: 63239 >> Threshold 0.18 True negative: 8412 False positive: 62186 >> Threshold 0.19 True negative: 9618 False positive: 60980 >> Threshold 0.2 True negative: 10825 False positive: 59773 >> Threshold 0.21 True negative: 12056 False positive: 58542 >> Threshold 0.22 True negative: 13286 False positive: 57312 >> Threshold 0.23 True negative: 14607 False positive: 55991 >> Threshold 0.24 True negative: 15870 False positive: 54728 >> Threshold 0.25 True negative: 17197 False positive: 53401 >> Threshold 0.26 True negative: 18595 False positive: 52003 >> Threshold 0.27 True negative: 20043 False positive: 50555 >> Threshold 0.28 True negative: 21432 False positive: 49166 >> Threshold 0.29 True negative: 22785 False positive: 47813 >> Threshold 0.3 True negative: 24136 False positive: 46462 >> Threshold 0.31 True negative: 25523 False positive: 45075 >> Threshold 0.32 True negative: 26936 False positive: 43662 >> Threshold 0.33 True negative: 28260 False positive: 42338 >> Threshold 0.34 True negative: 29695 False positive: 40903 >> Threshold 0.35000000000000003 True negative: 31044 False positive: 39554 >> Threshold 0.36 True negative: 32421 False positive: 38177 >> Threshold 0.37 True negative: 33717 False positive: 36881 >> Threshold 0.38 True negative: 35044 False positive: 35554 >> Threshold 0.39 True negative: 36379 False positive: 34219 >> Threshold 0.4 True negative: 37659 False positive: 32939 >> Threshold 0.41000000000000003 True negative: 38907 False positive: 31691 >> Threshold 0.42 True negative: 40180 False positive: 30418 >> Threshold 0.43 True negative: 41420 False positive: 29178 >> Threshold 0.44 True negative: 42688 False positive: 27910 >> Threshold 0.45 True negative: 43868 False positive: 26730 >> Threshold 0.46 True negative: 44983 False positive: 25615 >> Threshold 0.47000000000000003 True negative: 46127 False positive: 24471 >> Threshold 0.48 True negative: 47322 False positive: 23276 >> Threshold 0.49 True negative: 48390 False positive: 22208 >> Threshold 0.5 True negative: 49440 False positive: 21158 >> Threshold 0.51 True negative: 50495 False positive: 20103 >> Threshold 0.52 True negative: 51535 False positive: 19063 >> Threshold 0.53 True negative: 52567 False positive: 18031 >> Threshold 0.54 True negative: 53514 False positive: 17084 >> Threshold 0.55 True negative: 54455 False positive: 16143 >> Threshold 0.56 True negative: 55391 False positive: 15207 >> Threshold 0.5700000000000001 True negative: 56288 False positive: 14310 >> Threshold 0.58 True negative: 57210 False positive: 13388 >> Threshold 0.59 True negative: 58076 False positive: 12522 >> Threshold 0.6 True negative: 58859 False positive: 11739 >> Threshold 0.61 True negative: 59685 False positive: 10913 >> Threshold 0.62 True negative: 60460 False positive: 10138 >> Threshold 0.63 True negative: 61221 False positive: 9377 >> Threshold 0.64 True negative: 61927 False positive: 8671 >> Threshold 0.65 True negative: 62617 False positive: 7981 >> Threshold 0.66 True negative: 63270 False positive: 7328 >> Threshold 0.67 True negative: 63881 False positive: 6717 >> Threshold 0.68 True negative: 64521 False positive: 6077 >> Threshold 0.6900000000000001 True negative: 65109 False positive: 5489 >> Threshold 0.7000000000000001 True negative: 65626 False positive: 4972 >> Threshold 0.71 True negative: 66133 False positive: 4465 >> Threshold 0.72 True negative: 66596 False positive: 4002 >> Threshold 0.73 True negative: 67024 False positive: 3574 >> Threshold 0.74 True negative: 67491 False positive: 3107 >> Threshold 0.75 True negative: 67903 False positive: 2695 >> Threshold 0.76 True negative: 68235 False positive: 2363 >> Threshold 0.77 True negative: 68565 False positive: 2033 >> Threshold 0.78 True negative: 68862 False positive: 1736 >> Threshold 0.79 True negative: 69178 False positive: 1420 >> Threshold 0.8 True negative: 69421 False positive: 1177 >> Threshold 0.81 True negative: 69671 False positive: 927 >> Threshold 0.8200000000000001 True negative: 69825 False positive: 773 >> Threshold 0.8300000000000001 True negative: 69987 False positive: 611 >> Threshold 0.84 True negative: 70137 False positive: 461 >> Threshold 0.85 True negative: 70257 False positive: 341 >> Threshold 0.86 True negative: 70358 False positive: 240 >> Threshold 0.87 True negative: 70440 False positive: 158 >> Threshold 0.88 True negative: 70490 False positive: 108 >> Threshold 0.89 True negative: 70532 False positive: 66 >> Threshold 0.9 True negative: 70569 False positive: 29 >> Threshold 0.91 True negative: 70587 False positive: 11 >> Threshold 0.92 True negative: 70597 False positive: 1 >> Threshold 0.93 True negative: 70598 False positive: 0 >> Threshold 0.9400000000000001 True negative: 70598 False positive: 0 >> Threshold 0.9500000000000001 True negative: 70598 False positive: 0 >> Threshold 0.96 True negative: 70598 False positive: 0 >> Threshold 0.97 True negative: 70598 False positive: 0 >> Threshold 0.98 True negative: 70598 False positive: 0 >> Threshold 0.99 True negative: 70598 False positive: 0 >> Threshold 1.0 True negative: 70598 False positive: 0 ------------------------------------------------------ Minimum: 0.51 Minimum value: 40853
# minimiser les faux negatifs en utilisant le fbeta score
from sklearn.metrics import fbeta_score
metric_metier = {}
threshold = np.linspace(0,1,101)
for i in threshold:
print(">> Threshold", i)
predclass = predprobapos.copy()
predclass[predclass < i] = 0
predclass[predclass >= i] = 1
res = fbeta_score(y_test, predclass, beta=2)
metric_metier[i] = res
key_max = max(metric_metier, key=metric_metier.get)
print("------------------------------------------------------")
print("Maximum:", key_max)
print("Maximum value:", metric_metier[key_max])
>> Threshold 0.0 >> Threshold 0.01 >> Threshold 0.02 >> Threshold 0.03 >> Threshold 0.04 >> Threshold 0.05 >> Threshold 0.06 >> Threshold 0.07 >> Threshold 0.08 >> Threshold 0.09 >> Threshold 0.1 >> Threshold 0.11 >> Threshold 0.12 >> Threshold 0.13 >> Threshold 0.14 >> Threshold 0.15 >> Threshold 0.16 >> Threshold 0.17 >> Threshold 0.18 >> Threshold 0.19 >> Threshold 0.2 >> Threshold 0.21 >> Threshold 0.22 >> Threshold 0.23 >> Threshold 0.24 >> Threshold 0.25 >> Threshold 0.26 >> Threshold 0.27 >> Threshold 0.28 >> Threshold 0.29 >> Threshold 0.3 >> Threshold 0.31 >> Threshold 0.32 >> Threshold 0.33 >> Threshold 0.34 >> Threshold 0.35000000000000003 >> Threshold 0.36 >> Threshold 0.37 >> Threshold 0.38 >> Threshold 0.39 >> Threshold 0.4 >> Threshold 0.41000000000000003 >> Threshold 0.42 >> Threshold 0.43 >> Threshold 0.44 >> Threshold 0.45 >> Threshold 0.46 >> Threshold 0.47000000000000003 >> Threshold 0.48 >> Threshold 0.49 >> Threshold 0.5 >> Threshold 0.51 >> Threshold 0.52 >> Threshold 0.53 >> Threshold 0.54 >> Threshold 0.55 >> Threshold 0.56 >> Threshold 0.5700000000000001 >> Threshold 0.58 >> Threshold 0.59 >> Threshold 0.6 >> Threshold 0.61 >> Threshold 0.62 >> Threshold 0.63 >> Threshold 0.64 >> Threshold 0.65 >> Threshold 0.66 >> Threshold 0.67 >> Threshold 0.68 >> Threshold 0.6900000000000001 >> Threshold 0.7000000000000001 >> Threshold 0.71 >> Threshold 0.72 >> Threshold 0.73 >> Threshold 0.74 >> Threshold 0.75 >> Threshold 0.76 >> Threshold 0.77 >> Threshold 0.78 >> Threshold 0.79 >> Threshold 0.8 >> Threshold 0.81 >> Threshold 0.8200000000000001 >> Threshold 0.8300000000000001 >> Threshold 0.84 >> Threshold 0.85 >> Threshold 0.86 >> Threshold 0.87 >> Threshold 0.88 >> Threshold 0.89 >> Threshold 0.9 >> Threshold 0.91 >> Threshold 0.92 >> Threshold 0.93 >> Threshold 0.9400000000000001 >> Threshold 0.9500000000000001 >> Threshold 0.96 >> Threshold 0.97 >> Threshold 0.98 >> Threshold 0.99 >> Threshold 1.0 ------------------------------------------------------ Maximum: 0.49 Maximum value: 0.4255668747946106
!jupyter nbconvert --config nbconvert/config_html.py
[NbConvertApp] Converting notebook P7_01_notebookexploration.ipynb to html
[NbConvertApp] Writing 4623390 bytes to P7_01_notebookexploration.html
[NbConvertApp] Converting notebook P7_02_notebookscoring.ipynb to html
[NbConvertApp] Writing 4437532 bytes to P7_02_notebookscoring.html
[NbConvertApp] Converting notebook P7_03_notebookdashboard.ipynb to html
C:\Users\flori\miniconda3\envs\jupyter\lib\site-packages\nbconvert\filters\datatypefilter.py:39: UserWarning: Your element with mimetype(s) dict_keys(['application/vnd.plotly.v1+json']) is not able to be represented.
warn("Your element with mimetype(s) {mimetypes}"
[NbConvertApp] Writing 901023 bytes to P7_03_notebookdashboard.html